PPG Signal Processing for the Goldilocks ppg files

In [1]:
# Initialize the package
import sys

repo = '/root/biosignal_processing_package'
sys.path.insert(1, repo)
sys.path.insert(1, repo + '/biosignal_processing_package' )

Import the package functions

In [2]:
import pandas as pd
import numpy as np

# package functions
from preprocessing.signal_transform import signal_downsample, segment_signal 
from preprocessing.signal_filter import filter_ppg
from plotting.signal_plots import plot_signal
from plotting.signal_plots import plot_filtered
from savvyppg import ppg_module
In /opt/anaconda/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The text.latex.preview rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /opt/anaconda/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The mathtext.fallback_to_cm rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /opt/anaconda/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: Support for setting the 'mathtext.fallback_to_cm' rcParam is deprecated since 3.3 and will be removed two minor releases later; use 'mathtext.fallback : 'cm' instead.
In /opt/anaconda/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The validate_bool_maybe_none function was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /opt/anaconda/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The savefig.jpeg_quality rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /opt/anaconda/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The keymap.all_axes rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /opt/anaconda/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The animation.avconv_path rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /opt/anaconda/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The animation.avconv_args rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
WARNING:tensorflow:From /opt/anaconda/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/compat/v2_compat.py:65: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
Instructions for updating:
non-resource variables are not supported in the long term

Reading and importing the signals

In [3]:
# read files
files = [
    #Signals we can produce output without obvious errors from:
    "0-VagalTone-1619468340-ppg.csv.gz",
    "0-VagalTone-1619541388-ppg.csv.gz",
    "4807805364203339305-RelaxGame-1619035664-ppg.csv.gz",
    "5118090038914056700-RelaxGame-1619042816-ppg.csv.gz",

    #Datasets with significant movement noise:
    "4835847629383064719-RelaxGame-1619041516-ppg.csv.gz",
    "5448292036127487645-Pizza-1619546545-ppg.csv.gz",
    "5359311199602188842-Potions-1619556392-ppg.csv.gz",
    "5359311199602188842-VagalTone-1619557200-ppg.csv.gz",
    "4650373652542159307-VagalTone-1619464304-ppg.csv.gz",
    "0-Potions-1622059739-ppg.csv.gz"
]

signals = []
for file in files:
    filepath = repo + "/data/" + file
    signal = pd.read_csv(filepath, compression = 'gzip')
    signal.columns = [col.strip() for col in signal.columns] # strip away spaces from col names
    signals.append(signal)

Inspect the structure of the data and estimate the sampling rate of the signal

In [4]:
signal = signals[3]
print(signal.shape)
display(signal.head())
sampling_rate = len(signal)/(signal.ppg_time[len(signal)-2])
print("sampling rate: " + str(sampling_rate))
(12675, 5)
ppg_time ppg0 ppg1 ppg2 ambient
0 0.000000 327663 341151 293754 163
1 0.007407 327639 341188 293732 219
2 0.014815 327665 341179 293704 206
3 0.022222 327639 341211 293684 179
4 0.029630 327651 341239 293757 217
sampling rate: 135.03119457114914
In [5]:
# comparing available signals
average_signal = (signal.ppg0+signal.ppg1+signal.ppg2)/3
plot_signal(
     [signal.ppg0.to_list(),signal.ppg1.to_list(),signal.ppg2.to_list(),average_signal.to_list()],
     [135,135,135,135], 
     labels=["ppg0","ppg1","ppg2","average"],
     x_axis_label = "Time (s)"
)
INFO:numexpr.utils:Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.

Visualizing all the signals

In [6]:
# visualizing all sets of signals
from preprocessing.signal_transform import signal_downsample
n= 0
for signal in signals:
    #downsample for plotting
    print(files[n])
    plot_signal(
         [
             signal_downsample(signal.ppg0.to_list(), 135, downsample_factor = 3),
             signal_downsample(signal.ppg1.to_list(), 135, downsample_factor = 3),
             signal_downsample(signal.ppg2.to_list(), 135, downsample_factor = 3)
         ],
         [135/3,135/3,135/3], 
         labels=["ppg0","ppg1","ppg2"],
         x_axis_label = "Time (s)"
    )
    n = n + 1
0-VagalTone-1619468340-ppg.csv.gz
/root/biosignal_processing_package/biosignal_processing_package/preprocessing/signal_transform.py:65: UserWarning: Downsample factor returns a non-integer sample points, setting a new downsample factor
  warnings.warn(f"Downsample factor returns a non-integer sample points, setting a new downsample factor")
0-VagalTone-1619541388-ppg.csv.gz
4807805364203339305-RelaxGame-1619035664-ppg.csv.gz
5118090038914056700-RelaxGame-1619042816-ppg.csv.gz
4835847629383064719-RelaxGame-1619041516-ppg.csv.gz
5448292036127487645-Pizza-1619546545-ppg.csv.gz
5359311199602188842-Potions-1619556392-ppg.csv.gz
5359311199602188842-VagalTone-1619557200-ppg.csv.gz
4650373652542159307-VagalTone-1619464304-ppg.csv.gz
0-Potions-1622059739-ppg.csv.gz

Signal Processing

Signal Filtering

In [7]:
# make list of only ppg0 signals
ppg0_list = [signal.ppg0.to_list() for signal in signals]
sr = 135
# actual cleaning
ppg0_clean = [filter_ppg(ppg0_sig, sr) for ppg0_sig in ppg0_list]

Signal Cleaning Inspection

In [8]:
# comparing raw and clean signals, signals are downsampled for visualization
for i in range(len(signals)):
    print(files[i])
    plot_filtered(
        signal_downsample(ppg0_list[i],135,downsample_factor = 3),
        signal_downsample(ppg0_clean[i],135,downsample_factor = 3),
        sr/3
    )
0-VagalTone-1619468340-ppg.csv.gz
0-VagalTone-1619541388-ppg.csv.gz
4807805364203339305-RelaxGame-1619035664-ppg.csv.gz
5118090038914056700-RelaxGame-1619042816-ppg.csv.gz
4835847629383064719-RelaxGame-1619041516-ppg.csv.gz
5448292036127487645-Pizza-1619546545-ppg.csv.gz
5359311199602188842-Potions-1619556392-ppg.csv.gz
5359311199602188842-VagalTone-1619557200-ppg.csv.gz
4650373652542159307-VagalTone-1619464304-ppg.csv.gz
0-Potions-1622059739-ppg.csv.gz

Signal Quality Inspection

In [9]:
# Preprocess and inspect beat level quality of each ppg signal
savvyppg_signals = []
for i in range(len(signals)):
    print("Processing " + files[i])
    sig = ppg_module.ppgSignal(ppg0_list[i],sr, predict_beats=True)
    savvyppg_signals.append(sig)
Processing 0-VagalTone-1619468340-ppg.csv.gz
Processing 0-VagalTone-1619541388-ppg.csv.gz
Processing 4807805364203339305-RelaxGame-1619035664-ppg.csv.gz
Processing 5118090038914056700-RelaxGame-1619042816-ppg.csv.gz
Processing 4835847629383064719-RelaxGame-1619041516-ppg.csv.gz
Processing 5448292036127487645-Pizza-1619546545-ppg.csv.gz
Processing 5359311199602188842-Potions-1619556392-ppg.csv.gz
Processing 5359311199602188842-VagalTone-1619557200-ppg.csv.gz
Processing 4650373652542159307-VagalTone-1619464304-ppg.csv.gz
Processing 0-Potions-1622059739-ppg.csv.gz
In [10]:
# Visualize the beat level signal quality
from plotting.ppg_plotting import ppg_plot_quality
for i in range(len(signals)):
    print(files[i])
    sqi = savvyppg_signals[i].beats_df.pred_label.sum()/savvyppg_signals[i].beats_num
    print("Proportion of good beats/total detected beats: ", sqi * 100)
    ppg_plot_quality((savvyppg_signals[i]).data_ppg,(savvyppg_signals[i]).beats_df,sr)
0-VagalTone-1619468340-ppg.csv.gz
Proportion of good beats/total detected beats:  75.4054054054054
0-VagalTone-1619541388-ppg.csv.gz
Proportion of good beats/total detected beats:  78.21100917431193
4807805364203339305-RelaxGame-1619035664-ppg.csv.gz
Proportion of good beats/total detected beats:  63.38028169014085
5118090038914056700-RelaxGame-1619042816-ppg.csv.gz
Proportion of good beats/total detected beats:  46.15384615384615
4835847629383064719-RelaxGame-1619041516-ppg.csv.gz
Proportion of good beats/total detected beats:  52.991452991452995
5448292036127487645-Pizza-1619546545-ppg.csv.gz
Proportion of good beats/total detected beats:  57.40432612312812
5359311199602188842-Potions-1619556392-ppg.csv.gz
Proportion of good beats/total detected beats:  11.645569620253164
5359311199602188842-VagalTone-1619557200-ppg.csv.gz
Proportion of good beats/total detected beats:  37.31617647058824
4650373652542159307-VagalTone-1619464304-ppg.csv.gz
Proportion of good beats/total detected beats:  10.975609756097562
0-Potions-1622059739-ppg.csv.gz
Proportion of good beats/total detected beats:  4.761904761904762